###################################################################################################
########################                  read in data             ################################
###################################################################################################

library(amap)
cccol <- c("#CE0013","#16557A","#C7A609","#87C232","#64C0AB","#A14C94","#15A08C","#8B7E75","#1E7CAF","#EA425F","#46489A","#E50033","#0F231F","#1187CD")
cccol50 <- c("#CE001330","#16557A30","#C7A60950","#87C23250","#64C0AB50","#A14C9450","#15A08C50","#8B7E7550","#1E7CAF50","#EA425F50","#46489A50","#E5003350","#0F231F50","#1187CD50")
data <- read.table("H3K4me3_H3K27me3_bivalent_sort.txt",row.names=1)
colnames(data) <- c("hiF","d6","n24","niPS","p24","piPS")

bivalent_data <- data
bivalent_data[bivalent_data==1] <- 0
bivalent_data[bivalent_data==2] <- 0
bivalent_data[bivalent_data==3] <- 1

write.table(bivalent_data,file="bivalent.txt",quote=F,sep="\t",col.names=F)
bivalent_data <- read.table("bivalent_sort.txt",row.names=1)
colnames(bivalent_data) <- c("hiF","d6","n24","niPS","p24","piPS")

############ 2nd naive RNAseq
logfpkm2nd <- read.table("../data/2nd.reprogramming.lg2.all.fpkm.txt",header=T,row.names=1)
n_path <- c("hiF_r1","hiF_r2","he0_r1","he0_r2","he2_r1","he2_r2","he6_r1","he6_r2","n8_r1","n8_r2","n12_r1","n12_r2","n14_r1","n14_r2","n20_r1","n20_r2","n24m_r1","n24m_r2","n24p_r1","n24p_r2","niPS_r1","niPS_r2")
nData_tmp <- logfpkm2nd[,n_path]
nfpkm2nd <- 2**nData_tmp - 1

n_time_point <- c("hiF","he0","he2","he6","n8","n12","n14","n20","n24p","n24m","niPS")
n_label <- c("hiF-T","0d","2d","6d","8d","12d","14d","20d","24d+dox","24d-dox","niPSC-T")
nData2ndfpkm <- cbind(apply(nfpkm2nd[,1:2],1,mean),apply(nfpkm2nd[,3:4],1,mean),apply(nfpkm2nd[,5:6],1,mean),apply(nfpkm2nd[,7:8],1,mean),apply(nfpkm2nd[,9:10],1,mean),apply(nfpkm2nd[,11:12],1,mean),apply(nfpkm2nd[,13:14],1,mean),apply(nfpkm2nd[,15:16],1,mean),apply(nfpkm2nd[,17:18],1,mean),apply(nfpkm2nd[,19:20],1,mean),apply(nfpkm2nd[,21:22],1,mean))
colnames(nData2ndfpkm) <- n_time_point
rownames(nData2ndfpkm) <- rownames(nfpkm2nd)

n_sd <- cbind(apply(nfpkm2nd[,1:2],1,sd),apply(nfpkm2nd[,3:4],1,sd),apply(nfpkm2nd[,5:6],1,sd),apply(nfpkm2nd[,7:8],1,sd),apply(nfpkm2nd[,9:10],1,sd),apply(nfpkm2nd[,11:12],1,sd),apply(nfpkm2nd[,13:14],1,sd),apply(nfpkm2nd[,15:16],1,sd),apply(nfpkm2nd[,17:18],1,sd),apply(nfpkm2nd[,19:20],1,sd),apply(nfpkm2nd[,21:22],1,sd))
colnames(n_sd) <- n_time_point
rownames(n_sd) <- rownames(nfpkm2nd)
n_log_sd <- cbind(apply(nData_tmp[,1:2],1,sd),apply(nData_tmp[,3:4],1,sd),apply(nData_tmp[,5:6],1,sd),apply(nData_tmp[,7:8],1,sd),apply(nData_tmp[,9:10],1,sd),apply(nData_tmp[,11:12],1,sd),apply(nData_tmp[,13:14],1,sd),apply(nData_tmp[,15:16],1,sd),apply(nData_tmp[,17:18],1,sd),apply(nData_tmp[,19:20],1,sd),apply(nData_tmp[,21:22],1,sd))
colnames(n_log_sd) <- n_time_point
rownames(n_log_sd) <- rownames(nfpkm2nd)

############ 2nd primed RNAseq
pData2ndfpkm <- read.table("../data/paper.primed.fpkm.txt",header=T,row.names=1)

nData <- log2(nData2ndfpkm+1)
pData <- log2(pData2ndfpkm+1)
common_time_point <- c("hiF-T","2d","6d","8d","14d","20d","24d+dox","24d-dox","iPSC-T")
colnames(nData2ndfpkm) <- n_time_point
rownames(nData2ndfpkm) <- rownames(nfpkm2nd)

###################################################################################################
########################                      plot                 ################################
###################################################################################################

k <- 6
set.seed(9)
km <- Kmeans(data,k,method = "correlation")
# km <- kmeans(data,k)

plot_matrix <- bivalent_data
pdf("bivalency_change_pattern.pdf",width=3.6,height=3.6)
gene_sort <- c()
clusterBoundary <- c()
tmp_boundary_sum <- 0
for (each in seq(k)){
     modGenes = names(which(km$cluster==each))
     v1 = apply(plot_matrix[modGenes,],2,mean)
     plot(v1[1:4],lwd=3,type="l",col=cccol[1],main=paste("cluster_",each,"--",length(modGenes),"genes ",sep=""),ylim=c(0,1),xlab=NA,ylab="bivalency",xaxt="n")
     axis(side=1,1:4,c("hiF-T","6d","24d+dox","iPSC-T"),las=2);axis(side=2);box()
     points(v1[c(1,2,5,6)],lwd=3,type="l",col=cccol[2])
     gene_sort <- c(gene_sort,modGenes)
     tmp_boundary_sum <- tmp_boundary_sum+length(which(km$cluster==each))
     clusterBoundary <- c(clusterBoundary,tmp_boundary_sum)
}
dev.off()

pdf("expression_change_pattern.pdf",width=3.6,height=3.6)
common_time_point <- c("hiF-T","2d","6d","8d","14d","20d","24d+dox","24d-dox","iPSC-T")
for (each in seq(k)){
     modGenes = intersect(intersect(names(which(km$cluster==each)),row.names(nData)),row.names(pData))
     write.table(cbind(modGenes),file=paste(each,"bivalency_genes.txt",sep=""),quote=F,sep="\t",col.names=F,row.names=F)
     v1 = apply(nData[modGenes,c("hiF","he2","he6","n8","n14","n20","n24p","n24m","niPS")],2,mean)
     n <- length(modGenes)
     sd <- apply(nData[modGenes,c("hiF","he2","he6","n8","n14","n20","n24p","n24m","niPS")],2,sd)
     alpha <- 0.05
     v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
     v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
     plot(v1,lwd=3,type="l",col=cccol[1],xlab=NA,ylab="Log2(FPKM+1)",xaxt="n",ylim=c(0,4))
     axis(side=1,1:length(common_time_point),common_time_point,las=2);axis(side=2);box()
     polygon(c(1,1:length(common_time_point),length(common_time_point):2),c(v2[1],v3,v2[length(common_time_point):2]),col=adjustcolor("grey", alpha.f = 0.4),border=NA)
     v1 = apply(pData[modGenes,c("hiFT","d2","d5","d8","d14","d20","d24p","d24m","hiPST")],2,mean)
     n <- length(modGenes)
     sd <- apply(pData[modGenes,c("hiFT","d2","d5","d8","d14","d20","d24p","d24m","hiPST")],2,sd)
     alpha <- 0.05
     v2 = v1 - sd/sqrt(n)*qt(1-alpha/2,n-1)
     v3 = v1 + sd/sqrt(n)*qt(1-alpha/2,n-1)
     points(v1,lwd=3,type="l",col=cccol[2],lty=2)
     polygon(c(1,1:length(common_time_point),length(common_time_point):2),c(v2[1],v3,v2[length(common_time_point):2]),col=adjustcolor("grey", alpha.f = 0.4),border=NA)

}
dev.off()

plot_data <- data[gene_sort,c("piPS","p24","d6","hiF","d6","n24","niPS")]
pdf("bivalency_change_heatmap.pdf",width=3.6,height=8)
ColorRamp <- c("grey",cccol50[1],cccol50[4],cccol[2])
ColorLevels <- c(0,1,2,3)
par(mar=c(4,4,2,2))
layout(matrix(c(rep(1,6),2),nrow=7,ncol=1))
image(1:ncol(plot_data), 1:nrow(plot_data), t(plot_data), xaxt="n", yaxt="n", col=ColorRamp, xlab="", ylab="")
axis(side=1,1:ncol(plot_data),labels=c("piPSC-T","p24d","6d","hiF-T","6d","n24d","niPSC-T"),cex.axis=1.2,las=2);box()
abline(h=clusterBoundary+0.5,lwd=2,lty=2)
image(ColorLevels,1,matrix(data=ColorLevels, nrow=length(ColorLevels),ncol=1),col=ColorRamp, xlab="",ylab="",cex.axis=1,xaxt="n",yaxt="n")
axis(side=1,c(0,1,2,3),labels=c("NonMarked","H3K4me3","H3K27me3","Bivalent"))

plot_data <- data[gene_sort,c("piPS","p24","d6","hiF","d6","n24","niPS")]
ColorRamp <- c("grey",cccol50[1],cccol50[4],cccol[2])
ColorLevels <- c(0,1,2,3)
par(mar=c(6,1,2,1))
layout(matrix(c(rep(1,2),rep(2,3),rep(3,2)),ncol=7,byrow=T))
image(1:ncol(plot_data[,c(1,2)]),1:nrow(plot_data), t(plot_data[,c(1,2)]), xaxt="n", yaxt="n", col=ColorRamp, xlab="", ylab="")
abline(h=clusterBoundary+0.5,lwd=2,lty=2)
image(1:ncol(plot_data[,c(3,4,5)]),1:nrow(plot_data), t(plot_data[,c(3,4,5)]), xaxt="n", yaxt="n", col=ColorRamp, xlab="", ylab="")
abline(h=clusterBoundary+0.5,lwd=2,lty=2)
image(1:ncol(plot_data[,c(6,7)]),1:nrow(plot_data), t(plot_data[,c(6,7)]), xaxt="n", yaxt="n", col=ColorRamp, xlab="", ylab="")
abline(h=clusterBoundary+0.5,lwd=2,lty=2)
dev.off()
